{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Multi-Head Attention Sub-Layer.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "946c90b82f7f46caa25c885668b75eab": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_4191af78535e4da8bb797690eff84e00",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_9ce3d57b96b64da0b15e3f3626bacb30",
              "IPY_MODEL_f8da2c91156342a69d9b262f4f993aa4"
            ]
          }
        },
        "4191af78535e4da8bb797690eff84e00": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "9ce3d57b96b64da0b15e3f3626bacb30": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_97370923218945c5b80ab468751ac8a7",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 230,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 230,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_0ba4a91f472e4c41ba80ab4025288446"
          }
        },
        "f8da2c91156342a69d9b262f4f993aa4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_15aa4b6f8f784c74804107be249126b9",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 230/230 [00:01&lt;00:00, 185B/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_edea457617ed4792aeeb65292019ceb4"
          }
        },
        "97370923218945c5b80ab468751ac8a7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "0ba4a91f472e4c41ba80ab4025288446": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "15aa4b6f8f784c74804107be249126b9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "edea457617ed4792aeeb65292019ceb4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "aXACkAtfNpG0",
        "colab_type": "text"
      },
      "source": [
        "# The Attention Mechanism\n",
        "Copyright 2020, Denis Rothman, MIT License. Denis Rothman rewrote the reference notebook entirely in basic Python with no frameworks. Three more steps were added, and a Hugging Face transformer example was added. The original images were taken out, redesigned by Denis Rothman for educational purposes, and inserted in the book descriptions of the multi-attention sub-layer.\n",
        "\n",
        "[The Reference Colaboratory Notebook was written by Manuel Romero](https://colab.research.google.com/drive/1rPk3ohrmVclqhH7uQ7qys4oznDdAhpzF)\n",
        "\n",
        "[A Medium article was written by Raimi Karim](https://towardsdatascience.com/illustrated-self-attention-2d627e33b20a)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "veRoFjFRNXwJ",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import numpy as np\n",
        "from scipy.special import softmax"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "JLe9lWCJNogW",
        "colab_type": "code",
        "outputId": "733e039b-343e-4161-9919-19b3a1ec130f",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 90
        }
      },
      "source": [
        "print(\"Step 1: Input : 3 inputs, d_model=4\")\n",
        "x =np.array([[1.0, 0.0, 1.0, 0.0],   # Input 1\n",
        "             [0.0, 2.0, 0.0, 2.0],   # Input 2\n",
        "             [1.0, 1.0, 1.0, 1.0]])  # Input 3\n",
        "print(x)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Step 1: Input : 3 inputs, d_model=4\n",
            "[[1. 0. 1. 0.]\n",
            " [0. 2. 0. 2.]\n",
            " [1. 1. 1. 1.]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "JZImwtHPN91V",
        "colab_type": "code",
        "outputId": "07706940-e200-4956-b957-fe9681139d0d",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 126
        }
      },
      "source": [
        "print(\"Step 2: weights 3 dimensions x d_model=4\")\n",
        "print(\"w_query\")\n",
        "w_query =np.array([[1, 0, 1],\n",
        "                   [1, 0, 0],\n",
        "                   [0, 0, 1],\n",
        "                   [0, 1, 1]])\n",
        "print(w_query)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Step 2: weights 3 dimensions x d_model=4\n",
            "w_query\n",
            "[[1 0 1]\n",
            " [1 0 0]\n",
            " [0 0 1]\n",
            " [0 1 1]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "7kRBS7MUOFgV",
        "colab_type": "code",
        "outputId": "8b0bcc03-88b1-4e8d-a483-dacc91ffa9ee",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 108
        }
      },
      "source": [
        "print(\"w_key\")\n",
        "w_key =np.array([[0, 0, 1],\n",
        "                 [1, 1, 0],\n",
        "                 [0, 1, 0],\n",
        "                 [1, 1, 0]])\n",
        "print(w_key)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "w_key\n",
            "[[0 0 1]\n",
            " [1 1 0]\n",
            " [0 1 0]\n",
            " [1 1 0]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Napm2VtkOIEN",
        "colab_type": "code",
        "outputId": "7331eb08-64d5-4a36-eeef-0a0a556f130b",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 108
        }
      },
      "source": [
        "print(\"w_value\")\n",
        "w_value = np.array([[0, 2, 0],\n",
        "                    [0, 3, 0],\n",
        "                    [1, 0, 3],\n",
        "                    [1, 1, 0]])\n",
        "print(w_value)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "w_value\n",
            "[[0 2 0]\n",
            " [0 3 0]\n",
            " [1 0 3]\n",
            " [1 1 0]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "JqapIgfDOQ7d",
        "colab_type": "code",
        "outputId": "fd610d7a-968a-47e6-d614-40ad03c1d172",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 108
        }
      },
      "source": [
        "print(\"Step 3: Matrix multiplication to obtain Q,K,V\")\n",
        "\n",
        "print(\"Queries: x * w_query\")\n",
        "Q=np.matmul(x,w_query)\n",
        "print(Q)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Step 3: Matrix multiplication to obtain Q,K,V\n",
            "Queries: x * w_query\n",
            "[[1. 0. 2.]\n",
            " [2. 2. 2.]\n",
            " [2. 1. 3.]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "NmfMln1Wmv73",
        "colab_type": "code",
        "outputId": "065b63ba-7584-4302-97cd-d5e1765470ed",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 108
        }
      },
      "source": [
        "print(\"Step 3: Matrix multiplication to obtain Q,K,V\")\n",
        "\n",
        "print(\"Keys: x * w_key\")\n",
        "K=np.matmul(x,w_key)\n",
        "print(K)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Step 3: Matrix multiplication to obtain Q,K,V\n",
            "Keys: x * w_key\n",
            "[[0. 1. 1.]\n",
            " [4. 4. 0.]\n",
            " [2. 3. 1.]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "v3Asv-8mOWkN",
        "colab_type": "code",
        "outputId": "2ec71310-0486-46f4-d9f5-d12a1a6ad0e6",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 90
        }
      },
      "source": [
        "print(\"Values: x * w_value\")\n",
        "V=np.matmul(x,w_value)\n",
        "print(V)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Values: x * w_value\n",
            "[[1. 2. 3.]\n",
            " [2. 8. 0.]\n",
            " [2. 6. 3.]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gfgRAHUuOp5c",
        "colab_type": "code",
        "outputId": "ad02f055-11e0-4b9a-eb15-b66e4846c95e",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 90
        }
      },
      "source": [
        "print(\"Step 4: Scaled Attention Scores\")\n",
        "k_d=1   #square root of k_d=3 rounded down to 1 for this example\n",
        "attention_scores = (Q @ K.transpose())/k_d\n",
        "print(attention_scores)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Step 4: Scaled Attention Scores\n",
            "[[ 2.  4.  4.]\n",
            " [ 4. 16. 12.]\n",
            " [ 4. 12. 10.]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hg2t6KuNOjzM",
        "colab_type": "code",
        "outputId": "c0610f91-cd1d-4b0f-b5ce-f6445481186a",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 90
        }
      },
      "source": [
        "print(\"Step 5: Scaled softmax attention_scores for each vector\")\n",
        "attention_scores[0]=softmax(attention_scores[0])\n",
        "attention_scores[1]=softmax(attention_scores[1])\n",
        "attention_scores[2]=softmax(attention_scores[2])\n",
        "print(attention_scores[0])\n",
        "print(attention_scores[1])\n",
        "print(attention_scores[2])"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Step 5: Scaled softmax attention_scores for each vector\n",
            "[0.06337894 0.46831053 0.46831053]\n",
            "[6.03366485e-06 9.82007865e-01 1.79861014e-02]\n",
            "[2.95387223e-04 8.80536902e-01 1.19167711e-01]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "R4Es7A7NOvjD",
        "colab_type": "code",
        "outputId": "b86060fe-1292-47c5-93f6-ddeeca1bfb62",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 199
        }
      },
      "source": [
        "print(\"Step 6: attention value obtained by score1/k_d * V\")\n",
        "print(V[0])\n",
        "print(V[1])\n",
        "print(V[2])\n",
        "print(\"Attention 1\")\n",
        "attention1=attention_scores[0].reshape(-1,1)\n",
        "attention1=attention_scores[0][0]*V[0]\n",
        "print(attention1)\n",
        "\n",
        "print(\"Attention 2\")\n",
        "attention2=attention_scores[0][1]*V[1]\n",
        "print(attention2)\n",
        "\n",
        "print(\"Attention 3\")\n",
        "attention3=attention_scores[0][2]*V[2]\n",
        "print(attention3)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Step 6: attention value obtained by score1/k_d * V\n",
            "[1. 2. 3.]\n",
            "[2. 8. 0.]\n",
            "[2. 6. 3.]\n",
            "Attention 1\n",
            "[0.06337894 0.12675788 0.19013681]\n",
            "Attention 2\n",
            "[0.93662106 3.74648425 0.        ]\n",
            "Attention 3\n",
            "[0.93662106 2.80986319 1.40493159]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "uBDKhaCvOzXj",
        "colab_type": "code",
        "outputId": "138901d8-0aa9-4db9-b8b1-76ad557e6688",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        }
      },
      "source": [
        "print(\"Step 7: summed the results to create the first line of the output matrix\")\n",
        "attention_input1=attention1+attention2+attention3\n",
        "print(attention_input1)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Step 7: summed the results to create the first line of the output matrix\n",
            "[1.93662106 6.68310531 1.59506841]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "iEjgRcqHO4ik",
        "colab_type": "code",
        "outputId": "675a154b-a305-4c0c-e314-353541abfd3e",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 635
        }
      },
      "source": [
        "print(\"Step 8: Step 1 to 7 for inputs 1 to 3\")\n",
        "#We assume we have 3 results with learned weights (they were not trained in this example)\n",
        "#We assume we are implementing the original Transformer paper. We will have 3 results of 64 dimensions each\n",
        "attention_head1=np.random.random((3, 64))\n",
        "print(attention_head1)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Step 8: Step 1 to 7 for inputs 1 to 3\n",
            "[[0.05750794 0.25966685 0.80912647 0.00841755 0.53786959 0.05089332\n",
            "  0.17938191 0.91091697 0.20593063 0.27634727 0.33869867 0.25488968\n",
            "  0.88673807 0.56544205 0.69075114 0.56069125 0.92579273 0.46042461\n",
            "  0.78471374 0.93064241 0.99626239 0.13662306 0.72892312 0.52327088\n",
            "  0.90128711 0.28245531 0.05630861 0.55857421 0.50998676 0.59709355\n",
            "  0.40038745 0.70580749 0.18971837 0.78544634 0.35815199 0.57527984\n",
            "  0.38283035 0.94917395 0.25450774 0.85725663 0.27262613 0.5720429\n",
            "  0.38092713 0.34721503 0.38857267 0.50218029 0.74035216 0.37789311\n",
            "  0.12812721 0.42074447 0.39534834 0.4927362  0.65353466 0.86485487\n",
            "  0.22989766 0.87239043 0.64613354 0.89034403 0.29338559 0.1671029\n",
            "  0.1675619  0.70683457 0.03683821 0.37657364]\n",
            " [0.08308343 0.01529261 0.34000535 0.48559272 0.25036425 0.98195061\n",
            "  0.72015388 0.03838282 0.18674587 0.33203929 0.82965726 0.6962791\n",
            "  0.49038184 0.97126469 0.25373185 0.18486967 0.38352481 0.68254099\n",
            "  0.01014604 0.51217341 0.17219508 0.14178547 0.74892979 0.12190071\n",
            "  0.0090985  0.09704158 0.70447804 0.21374912 0.72523093 0.89713875\n",
            "  0.28817021 0.56472583 0.59136866 0.7711216  0.78839121 0.03607145\n",
            "  0.33438564 0.99970048 0.80579864 0.79923327 0.57124039 0.64183951\n",
            "  0.11464931 0.703289   0.64033748 0.5799896  0.14488077 0.90946673\n",
            "  0.4189947  0.99825172 0.28607413 0.6801013  0.16240732 0.25219133\n",
            "  0.30470031 0.30292756 0.15999459 0.52230381 0.82012623 0.33586634\n",
            "  0.25613996 0.60354742 0.26006038 0.23281006]\n",
            " [0.37977727 0.7429604  0.38837932 0.18434243 0.84440271 0.53955069\n",
            "  0.40121556 0.83114666 0.48845808 0.58768546 0.4097926  0.29445373\n",
            "  0.22750019 0.9520429  0.99964437 0.57829693 0.32369595 0.60769326\n",
            "  0.76116892 0.14857116 0.07462658 0.01199289 0.37147371 0.80177111\n",
            "  0.60845313 0.33410248 0.06017335 0.447363   0.31500924 0.95988807\n",
            "  0.41506716 0.33740287 0.38991258 0.23478571 0.57808465 0.48520973\n",
            "  0.48241035 0.35030686 0.90598744 0.1296871  0.57966373 0.98736092\n",
            "  0.43859306 0.5358377  0.25181342 0.0195783  0.51178364 0.26981021\n",
            "  0.04674047 0.97762416 0.72747288 0.75616534 0.68105477 0.06914679\n",
            "  0.14054312 0.42816012 0.66792325 0.03168237 0.68685758 0.43487164\n",
            "  0.08064005 0.23444144 0.60360253 0.21423994]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "QI50dkZ1O630",
        "colab_type": "code",
        "outputId": "7d467842-f837-4e41-e099-534549b6fc05",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        }
      },
      "source": [
        "print(\"Step 9: We assume we have trained the 8 heads of the attention sub-layer\")\n",
        "z0h1=np.random.random((3, 64))\n",
        "z1h2=np.random.random((3, 64))\n",
        "z2h3=np.random.random((3, 64))\n",
        "z3h4=np.random.random((3, 64))\n",
        "z4h5=np.random.random((3, 64))\n",
        "z5h6=np.random.random((3, 64))\n",
        "z6h7=np.random.random((3, 64))\n",
        "z7h8=np.random.random((3, 64))\n",
        "print(\"shape of one head\",z0h1.shape,\"dimension of 8 heads\",64*8)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Step 9: We assume we have trained the 8 heads of the attention sub-layer\n",
            "shape of one head (3, 64) dimension of 8 heads 512\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3n87LE92_Puf",
        "colab_type": "code",
        "outputId": "55d00415-ebea-43a6-b4c5-ff13e02c3052",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 90
        }
      },
      "source": [
        "print(\"Step 10: Concatenation of heads 1 to 8 to obtain the original 8x64=512 output dimension of the model\")\n",
        "output_attention=np.hstack((z0h1,z1h2,z2h3,z3h4,z4h5,z5h6,z6h7,z7h8))\n",
        "print(output_attention)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Step 10: Concantenation of heads 1 to 8 to obtain the original 8x64=512 ouput dimension of the model\n",
            "[[0.46950893 0.88546586 0.47615937 ... 0.08285802 0.16577096 0.61094461]\n",
            " [0.31638247 0.24246402 0.30390966 ... 0.42283366 0.62127905 0.64414042]\n",
            " [0.1922683  0.7017995  0.60116595 ... 0.20012387 0.16264044 0.93645276]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "PJLl4Jf3fPLh",
        "colab_type": "text"
      },
      "source": [
        "And now with Hugging Face in one line!"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "CZIRvcRmfTPb",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#@title Transformer Installation\n",
        "!pip -qq install transformers"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cNwLYc-SfXdF",
        "colab_type": "code",
        "outputId": "d1314cc6-74d6-45cf-b8d6-0a903e58ac60",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 85,
          "referenced_widgets": [
            "946c90b82f7f46caa25c885668b75eab",
            "4191af78535e4da8bb797690eff84e00",
            "9ce3d57b96b64da0b15e3f3626bacb30",
            "f8da2c91156342a69d9b262f4f993aa4",
            "97370923218945c5b80ab468751ac8a7",
            "0ba4a91f472e4c41ba80ab4025288446",
            "15aa4b6f8f784c74804107be249126b9",
            "edea457617ed4792aeeb65292019ceb4"
          ]
        }
      },
      "source": [
        "#@title Retrieve pipeline of modules and choose English to French translation\n",
        "from transformers import pipeline\n",
        "translator = pipeline(\"translation_en_to_fr\")\n",
        "#One line of code!\n",
        "print(translator(\"It is easy to translate languages with transformers\", max_length=40))"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "946c90b82f7f46caa25c885668b75eab",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n",
            "[{'translation_text': 'Il est facile de traduire des langues avec des transformateurs.'}]\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}